import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import graph_objects as go
from plotly import express as px
from plotly.offline import init_notebook_mode, iplot
Games = pd.read_csv("vgsales.csv")
Games.isna().any()
bool_series = pd.isnull(Games["Year"])
Games[bool_series].count()
Games[bool_series].head(50)
bool_series = pd.isnull(Games["Publisher"])
Games[bool_series].count()
Games[bool_series]
games = Games.dropna()
years_with_nan = games.Year.unique()
nan_array = np.isnan(years_with_nan)
not_nan_array = ~ nan_array
years = years_with_nan[not_nan_array]
years.sort()
years
years_by_numb_of_games_values = games.Year.value_counts().keys().tolist()
years_by_numb_of_games_counts = games.Year.value_counts().tolist()
games.Year.value_counts()
games.Publisher
#according to game amount
tops_publisher = games.Publisher.value_counts().head(10)
px.bar(tops_publisher, title= "Top 10 Video Game Publishers According To Game Amount",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#according to global game sales
top10_sales_Publisher = []
len(games.Publisher.unique())
#sort according to Global_Sales
publisher_sales = games.loc[:,["Name" ,"Publisher", "NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales",
"Global_Sales"]].groupby(by="Publisher").sum()
#reversed_df = df.iloc[::-1]
max_gsales_publisher = publisher_sales.sort_values(["Global_Sales"]).tail(10)
tops_publisher_gsales = max_gsales_publisher.iloc[::-1]
tops_publisher_gsales
px.bar(tops_publisher_gsales["Global_Sales"], title= "Top 10 Video Game Publishers According To Global Game Sales",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#according to japan game sales
max_jsales_publisher = publisher_sales.sort_values(["JP_Sales"]).tail(10)
tops_publisher_jsales = max_jsales_publisher.iloc[::-1]
tops_publisher_jsales
px.bar(tops_publisher_jsales["JP_Sales"], title= "Top 10 Video Game Publishers According To Game Sales in Japan",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#according to europe game sales
max_esales_publisher = publisher_sales.sort_values(["EU_Sales"]).tail(10)
tops_publisher_esales = max_esales_publisher.iloc[::-1]
tops_publisher_esales
px.bar(tops_publisher_esales["EU_Sales"], title= "Top 10 Video Game Publishers According To Game Sales in Europe",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#according to north america game sales
max_nsales_publisher = publisher_sales.sort_values(["NA_Sales"]).tail(10)
tops_publisher_nsales = max_nsales_publisher.iloc[::-1]
tops_publisher_nsales
px.bar(tops_publisher_nsales["NA_Sales"], title= "Top 10 Video Game Publishers According To Game Sales in North America",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#according to north america game sales
max_osales_publisher = publisher_sales.sort_values(["Other_Sales"]).tail(10)
tops_publisher_osales = max_osales_publisher.iloc[::-1]
tops_publisher_osales
px.bar(tops_publisher_osales["Other_Sales"], title= "Top 10 Video Game Publishers According To Game Sales in Other Regions",
labels={
"value" : "Number of Games Publishing",
"index" : "Name of the Publisher"
})
#global sales according to years
figs = []
game_sales_inyears = games.groupby(['Year','Publisher'], as_index=False)['Global_Sales'].sum()
top_gallsales_inyears = pd.DataFrame()
for i in range(len(years)):
if i == 0:
top_gallsales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(2)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(2)
top_gallsales_inyears = top_gallsales_inyears.append(df, ignore_index=True)
top_gallsales_inyears
publishers = top_gallsales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years, y=top_gallsales_inyears.Global_Sales[top_gallsales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Global Game Sales According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_gallsales_inyears, x="Global_Sales", y="Year", color="Publisher", size="Global_Sales",
hover_data=["Publisher"], title="All Year Wise Global Game Sales In Millions According to Years",
labels={"x": "Global_Sales", "y": "Year"})
fig.show()
figs = []
for i in range(20,31):
if i == 20:
top_gsales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(5)
top_gsales_inyears = top_gsales_inyears.append(df, ignore_index=True)
publishers = top_gsales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years[20:31], y=top_gsales_inyears.Global_Sales[top_gsales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Global Game Sales According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_gsales_inyears, x="Global_Sales", y="Year", color="Publisher", size="Global_Sales",
hover_data=["Publisher"], title="Year Wise Global Game Sales In Millions According to Years",
labels={"x": "Global_Sales", "y": "Year"})
fig.show()
game09 = games[games.Year == 2009]
global_val = top_gsales_inyears.Publisher[top_gsales_inyears.Year == 2009]
values = pd.DataFrame()
result = pd.DataFrame()
for i in range(len(global_val)):
if i == 0:
values = game09[game09.Publisher == global_val.iloc[i]]
result = values[values.Global_Sales == values.Global_Sales.max()]
else:
values = game09[game09.Publisher == global_val.iloc[i]]
result = result.append(values[values.Global_Sales == values.Global_Sales.max()])
fig = px.sunburst(result, path = ["Name", "Genre", "Publisher"], values = result["Global_Sales"],
title="Best Selling Games of the Top 5 Distributors in Global in 2011 and Their Genres")
fig.update_layout(
grid=dict(columns=2, rows=2),
margin=dict(t=40, l=2, r=2, b=5))
fig.show()
figs = []
game_sales_inyears = games.groupby(['Year','Publisher'], as_index=False)['JP_Sales'].sum()
top_jsales_inyears = pd.DataFrame()
for i in range(20,31):
if i == 20:
top_jsales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "JP_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "JP_Sales",
ascending = False).head(5)
top_jsales_inyears = top_jsales_inyears.append(df, ignore_index=True)
publishers = top_jsales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years[20:31], y=top_jsales_inyears.JP_Sales[top_jsales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Game Sales in Japan According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_jsales_inyears, x="JP_Sales", y="Year", color="Publisher", size="JP_Sales",
hover_data=["Publisher"], title="Year Wise Game Sales In Japan In Millions According to Years",
labels={"x": "JP_Sales", "y": "Year"})
fig.show()
game09 = games[games.Year == 2009]
japan_val = top_jsales_inyears.Publisher[top_jsales_inyears.Year == 2009]
values = pd.DataFrame()
result = pd.DataFrame()
for i in range(len(japan_val)):
if i == 0:
values = game09[game09.Publisher == japan_val.iloc[i]]
result = values[values.JP_Sales == values.JP_Sales.max()]
else:
values = game09[game09.Publisher == japan_val.iloc[i]]
result = result.append(values[values.JP_Sales == values.JP_Sales.max()])
fig = px.sunburst(result, path = ["Name", "Genre", "Publisher"], values = result["JP_Sales"],
title="Best Selling Games of the Top 5 Distributors in Japan in 2011 and Their Genres")
fig.update_layout(
grid=dict(columns=2, rows=2),
margin=dict(t=40, l=2, r=2, b=5))
fig.show()
figs = []
game_sales_inyears = games.groupby(['Year','Publisher'], as_index=False)['EU_Sales'].sum()
top_eusales_inyears = pd.DataFrame()
for i in range(20,31):
if i == 20:
top_eusales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "EU_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "EU_Sales",
ascending = False).head(5)
top_eusales_inyears = top_eusales_inyears.append(df, ignore_index=True)
publishers = top_eusales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years[20:31], y=top_eusales_inyears.EU_Sales[top_eusales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Game Sales in Europe According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_eusales_inyears, x="EU_Sales", y="Year", color="Publisher", size="EU_Sales",
hover_data=["Publisher"], title="Year Wise Game Sales In Europe In Millions According to Years",
labels={"x": "EU_Sales", "y": "Year"})
fig.show()
game09 = games[games.Year == 2009]
eu_val = top_eusales_inyears.Publisher[top_eusales_inyears.Year == 2009]
values = pd.DataFrame()
result = pd.DataFrame()
for i in range(len(eu_val)):
if i == 0:
values = game09[game09.Publisher == eu_val.iloc[i]]
result = values[values.EU_Sales == values.EU_Sales.max()]
else:
values = game09[game09.Publisher == eu_val.iloc[i]]
result = result.append(values[values.EU_Sales == values.EU_Sales.max()])
fig = px.sunburst(result, path = ["Name", "Genre", "Publisher"], values = result["EU_Sales"],
title="Best Selling Games of the Top 5 Distributors in Europe in 2011 and Their Genres")
fig.update_layout(
grid=dict(columns=2, rows=2),
margin=dict(t=40, l=2, r=2, b=5))
fig.show()
figs = []
game_sales_inyears = games.groupby(['Year','Publisher'], as_index=False)['NA_Sales'].sum()
top_nasales_inyears = pd.DataFrame()
for i in range(20,31):
if i == 20:
top_nasales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "NA_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "NA_Sales",
ascending = False).head(5)
top_nasales_inyears = top_nasales_inyears.append(df, ignore_index=True)
publishers = top_nasales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years[20:31], y=top_nasales_inyears.NA_Sales[top_nasales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Game Sales in North America According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_nasales_inyears, x="NA_Sales", y="Year", color="Publisher", size="NA_Sales",
hover_data=["Publisher"], title="Year Wise Game Sales In North America In Millions According to Years",
labels={"x": "NA_Sales", "y": "Year"})
fig.show()
game09 = games[games.Year == 2009]
na_val = top_nasales_inyears.Publisher[top_nasales_inyears.Year == 2009]
values = pd.DataFrame()
result = pd.DataFrame()
for i in range(len(na_val)):
if i == 0:
values = game09[game09.Publisher == eu_val.iloc[i]]
result = values[values.NA_Sales == values.NA_Sales.max()]
else:
values = game09[game09.Publisher == eu_val.iloc[i]]
result = result.append(values[values.NA_Sales == values.NA_Sales.max()])
fig = px.sunburst(result, path = ["Name", "Genre", "Publisher"], values = result["NA_Sales"],
title="Best Selling Games of the Top 5 Distributors in North America in 2011 and Their Genres")
fig.update_layout(
grid=dict(columns=2, rows=2),
margin=dict(t=40, l=2, r=2, b=5))
fig.show()
figs = []
game_sales_inyears = games.loc[:,["Year", "Publisher", "Other_Sales"]].groupby(['Year','Publisher'], as_index=False)['Other_Sales'].sum()
top_osales_inyears = pd.DataFrame()
for i in range(20,31):
if i == 20:
top_osales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Other_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Other_Sales",
ascending = False).head(5)
top_osales_inyears = top_osales_inyears.append(df, ignore_index=True)
publishers = top_osales_inyears.Publisher.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years[20:31], y=top_osales_inyears.Other_Sales[top_osales_inyears.Publisher == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Game Sales in Other Regions According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
fig = px.scatter(top_osales_inyears, x="Other_Sales", y="Year", color="Publisher", size="Other_Sales",
hover_data=["Publisher"], title="Year Wise Game Sales In Other Regions In Millions According to Years",
labels={"x": "Other_Sales", "y": "Year"})
fig.show()
"""
years_by_numb_of_games_values
years_by_numb_of_games_counts
top_osales_inyears[top_osales_inyears["Year"] == 2011]
"""
game09 = games[games.Year == 2009]
other_val = top_osales_inyears.Publisher[top_osales_inyears.Year == 2009]
values = pd.DataFrame()
result = pd.DataFrame()
for i in range(len(other_val)):
if i == 0:
values = game09[game09.Publisher == other_val.iloc[i]]
result = values[values.Other_Sales == values.Other_Sales.max()]
else:
values = game09[game09.Publisher == other_val.iloc[i]]
result = result.append(values[values.Other_Sales == values.Other_Sales.max()])
fig = px.sunburst(result, path = ["Name", "Genre", "Publisher"], values = result["Other_Sales"],
title="Best Selling Games of the Top 5 Distributors in Other Regions in 2011 and Their Genres")
fig.update_layout(
grid=dict(columns=2, rows=2),
margin=dict(t=40, l=2, r=2, b=5))
fig.show()
#global sales according to years - zoom toplantısında yapılan kısım
#group by genre and year
figs = []
game_sales_inyears = games.groupby(['Year','Genre'], as_index=False)['Global_Sales'].sum()
top_genre_sales_inyears = pd.DataFrame()
for i in range(len(years)):
if i == 0:
top_genre_sales_inyears = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(5)
else:
df = game_sales_inyears[game_sales_inyears["Year"] == years[i]].sort_values(by = "Global_Sales",
ascending = False).head(5)
top_genre_sales_inyears = top_genre_sales_inyears.append(df, ignore_index=True)
top_genre_sales_inyears
publishers = top_genre_sales_inyears.Genre.unique()
for p in range(len(publishers)):
figs.append(go.Scatter(x = years, y=top_genre_sales_inyears.Global_Sales[top_genre_sales_inyears.Genre == publishers[p]],
name=publishers[p], line_shape="vh"))
layout = dict(title="Year Wise Global Game Sales According to Years",
xaxis = dict(title = "Years"), yaxis=dict(title = "Total Sales In Millions"))
figure=dict(data=figs, layout=layout)
iplot(figure)
#df[df['Country (region)'].str.match('^P.*')== True]
GTA = games[games["Name"].str.match("Grand Theft Auto") == True]
#GTA.Name.unique()
GTA
FIFA = games[games["Name"].str.match("FIFA") == True]
#FIFA.Name.unique()
FIFA